################################################################################
## LOAD PACKAGES
################################################################################

library(ggplot2)
library(igraph)
library(ggraph)
library(patchwork)
library(dplyr)
library(readxl)
library(ggrepel)
library(tidygraph)
library(purrr)

################################################################################
## SET WORKING DIRECTORY
################################################################################

getwd()

#setwd() # Set working directory here

################################################################################
## EXPLAINED + CUMULATIVE VARIANCE VISUALIZATION (EFA)
################################################################################

# Read Excel files and assign each dataset to a VAA group
general.nl.efa.var.df <- read_excel("files/general_nl_efa_var_df.xlsx")
general.nl.efa.var.df$vaa <- "General – Dutch"

general.fr.efa.var.df <- read_excel("files/general_fr_efa_var_df.xlsx")
general.fr.efa.var.df$vaa <- "General – French"

youth.nl.efa.var.df <- read_excel("files/youth_nl_efa_var_df.xlsx")
youth.nl.efa.var.df$vaa <- "Youth – Dutch"

youth.fr.efa.var.df <- read_excel("files/youth_fr_efa_var_df.xlsx")
youth.fr.efa.var.df$vaa <- "Youth – French"

brussels.nl.efa.var.df <- read_excel("files/brussels_nl_efa_var_df.xlsx")
brussels.nl.efa.var.df$vaa <- "Brussels – Dutch"

brussels.fr.efa.var.df <- read_excel("files/brussels_fr_efa_var_df.xlsx")
brussels.fr.efa.var.df$vaa <- "Brussels – French"

flanders.nl.efa.var.df <- read_excel("files/flanders_nl_efa_var_df.xlsx")
flanders.nl.efa.var.df$vaa <- "Flanders"

wallonia.fr.efa.var.df <- read_excel("files/wallonia_fr_efa_var_df.xlsx")
wallonia.fr.efa.var.df$vaa <- "Wallonia"

federal.nl.efa.var.df <- read_excel("files/federal_nl_efa_var_df.xlsx")
federal.nl.efa.var.df$vaa <- "Federal – Dutch"

federal.fr.efa.var.df <- read_excel("files/federal_fr_efa_var_df.xlsx")
federal.fr.efa.var.df$vaa <- "Federal – French"

eu.nl.efa.var.df <- read_excel("files/eu_nl_efa_var_df.xlsx")
eu.nl.efa.var.df$vaa <- "EU – Dutch"

eu.fr.efa.var.df <- read_excel("files/eu_fr_efa_var_df.xlsx")
eu.fr.efa.var.df$vaa <- "EU – French"

# Combine into one dataframe
efa.var.df <- rbind(general.nl.efa.var.df, general.fr.efa.var.df,
                    youth.nl.efa.var.df, youth.fr.efa.var.df,
                    brussels.nl.efa.var.df, brussels.fr.efa.var.df,
                    flanders.nl.efa.var.df, wallonia.fr.efa.var.df,
                    federal.nl.efa.var.df, federal.fr.efa.var.df,
                    eu.nl.efa.var.df, eu.fr.efa.var.df)

# Convert to percentage
efa.var.df$Proportion_Variance <- efa.var.df$Proportion_Variance * 100

# Define custom order for facets
vaa_order <- c("General – Dutch", "General – French", "Youth – Dutch", "Youth – French",
               "Brussels – Dutch", "Brussels – French", "Flanders", "Wallonia",
               "Federal – Dutch", "Federal – French", "EU – Dutch", "EU – French")
efa.var.df$vaa <- factor(efa.var.df$vaa, levels = vaa_order)

# Calculate average and cumulative explained variance per VAA
variance.summary <- efa.var.df %>%
  group_by(vaa) %>%
  summarise(
    avg_variance = mean(Proportion_Variance),
    cumulative_variance = sum(Proportion_Variance)
  ) %>%
  ungroup()

# Merge summary data back into main dataframe for annotation
efa.var.df <- left_join(efa.var.df, variance.summary, by = "vaa")

# Calculate overall averages
overall_avg_variance <- round(mean(variance.summary$avg_variance), 2)
overall_cum_variance <- round(mean(variance.summary$cumulative_variance), 2)

# Create png output
png("figs/efa_var_g.png", units = "in", width = 5, height = 5, res = 1200)

# Plot
efa.var.g <- ggplot(efa.var.df, aes(x = Factor, y = Proportion_Variance, fill = Factor)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  geom_text(aes(label = round(Proportion_Variance, 1)), 
            vjust = -0.3, size = 2.5, color = "black") +
  # Add per-facet annotations: average and cumulative variance
  geom_text(data = distinct(efa.var.df, vaa, avg_variance, cumulative_variance), 
            aes(x = 1, y = max(efa.var.df$Proportion_Variance) + 5,
                label = paste0("Avg. var. = ", round(avg_variance, 1), "%\nCum. var. = ", round(cumulative_variance, 1), "%")), 
            inherit.aes = FALSE, size = 2.5, hjust = 0, vjust = 0.22) +
  facet_wrap(~ vaa, ncol = 4) +
  # Set y-axis scale from 0 to 25 with breaks of 5
  scale_y_continuous(limits = c(0, 25), breaks = seq(0, 30, by = 5)) +
  labs(
    y = "Explained Variance (%)",
    x = "Factor",
    caption = paste0("Average explained variance = ", overall_avg_variance, "%\n",
                     "Average cumulative variance = ", overall_cum_variance, "%")
  ) +
  theme_minimal(base_size = 10) +
  scale_fill_brewer(palette = "Set2") +
  theme(
    axis.title.x = element_text(size = 9, margin = margin(t = 8)),
    axis.title.y = element_text(size = 9, margin = margin(r = 8)),
    axis.text = element_text(size = 8),
    strip.text = element_text(size = 8, face = "bold"),
    plot.title = element_blank(),
    plot.caption = element_text(size = 8, hjust = 0),
    # Customize grid lines
    panel.grid.major = element_line(size = 0.15, color = "gray90"),  # Thinner and lighter gray for major grid lines
    panel.grid.minor = element_line(size = 0.15, color = "gray90")   # Thinner and lighter gray for minor grid lines
  )

# Draw the plot
efa.var.g

# Close device
dev.off()

################################################################################
## EXPLAINED + CUMULATIVE VARIANCE VISUALIZATION (IRT)
################################################################################

# Read Excel files and assign each dataset to a VAA group
general.nl.irt.var.df <- read_excel("files/general_nl_irt_var_df.xlsx")
general.nl.irt.var.df$vaa <- "General – Dutch"

general.fr.irt.var.df <- read_excel("files/general_fr_irt_var_df.xlsx")
general.fr.irt.var.df$vaa <- "General – French"

youth.nl.irt.var.df <- read_excel("files/youth_nl_irt_var_df.xlsx")
youth.nl.irt.var.df$vaa <- "Youth – Dutch"

youth.fr.irt.var.df <- read_excel("files/youth_fr_irt_var_df.xlsx")
youth.fr.irt.var.df$vaa <- "Youth – French"

brussels.nl.irt.var.df <- read_excel("files/brussels_nl_irt_var_df.xlsx")
brussels.nl.irt.var.df$vaa <- "Brussels – Dutch"

brussels.fr.irt.var.df <- read_excel("files/brussels_fr_irt_var_df.xlsx")
brussels.fr.irt.var.df$vaa <- "Brussels – French"

flanders.nl.irt.var.df <- read_excel("files/flanders_nl_irt_var_df.xlsx")
flanders.nl.irt.var.df$vaa <- "Flanders"

wallonia.fr.irt.var.df <- read_excel("files/wallonia_fr_irt_var_df.xlsx")
wallonia.fr.irt.var.df$vaa <- "Wallonia"

federal.nl.irt.var.df <- read_excel("files/federal_nl_irt_var_df.xlsx")
federal.nl.irt.var.df$vaa <- "Federal – Dutch"

federal.fr.irt.var.df <- read_excel("files/federal_fr_irt_var_df.xlsx")
federal.fr.irt.var.df$vaa <- "Federal – French"

eu.nl.irt.var.df <- read_excel("files/eu_nl_irt_var_df.xlsx")
eu.nl.irt.var.df$vaa <- "EU – Dutch"

eu.fr.irt.var.df <- read_excel("files/eu_fr_irt_var_df.xlsx")
eu.fr.irt.var.df$vaa <- "EU – French"

# Combine into one dataframe
irt.var.df <- rbind(general.nl.irt.var.df, general.fr.irt.var.df,
                    youth.nl.irt.var.df, youth.fr.irt.var.df,
                    brussels.nl.irt.var.df, brussels.fr.irt.var.df,
                    flanders.nl.irt.var.df, wallonia.fr.irt.var.df,
                    federal.nl.irt.var.df, federal.fr.irt.var.df,
                    eu.nl.irt.var.df, eu.fr.irt.var.df)

# Convert to percentage
irt.var.df$Proportion_Variance <- irt.var.df$Proportion_Variance * 100

# Define custom order for facets
vaa_order <- c("General – Dutch", "General – French", "Youth – Dutch", "Youth – French",
               "Brussels – Dutch", "Brussels – French", "Flanders", "Wallonia",
               "Federal – Dutch", "Federal – French", "EU – Dutch", "EU – French")
irt.var.df$vaa <- factor(irt.var.df$vaa, levels = vaa_order)

# Calculate average and cumulative explained variance per VAA
variance.summary <- irt.var.df %>%
  group_by(vaa) %>%
  summarise(
    avg_variance = mean(Proportion_Variance),
    cumulative_variance = sum(Proportion_Variance)
  ) %>%
  ungroup()

# Merge summary data back into main dataframe for annotation
irt.var.df <- left_join(irt.var.df, variance.summary, by = "vaa")

# Calculate overall averages
overall_avg_variance <- round(mean(variance.summary$avg_variance), 2)
overall_cum_variance <- round(mean(variance.summary$cumulative_variance), 2)

# Create png output
png("figs/irt_var_g.png", units = "in", width = 5, height = 5, res = 1200)

# Plot
irt.var.g <- ggplot(irt.var.df, aes(x = Factor, y = Proportion_Variance, fill = Factor)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  geom_text(aes(label = round(Proportion_Variance, 1)), 
            vjust = -0.3, size = 2.5, color = "black") +
  # Add per-facet annotations: average and cumulative variance
  geom_text(data = distinct(irt.var.df, vaa, avg_variance, cumulative_variance), 
            aes(x = 1, y = max(irt.var.df$Proportion_Variance) + 5,
                label = paste0("Avg. var. = ", round(avg_variance, 1), "%\nCum. var. = ", round(cumulative_variance, 1), "%")), 
            inherit.aes = FALSE, size = 2.5, hjust = 0, vjust = 0.22) +
  facet_wrap(~ vaa, ncol = 4) +
  # Set y-axis scale from 0 to 25 with breaks of 5
  scale_y_continuous(limits = c(0, 25), breaks = seq(0, 30, by = 5)) +
  labs(
    y = "Explained Variance (%)",
    x = "Factor",
    caption = paste0("Average explained variance = ", overall_avg_variance, "%\n",
                     "Average cumulative variance = ", overall_cum_variance, "%")
  ) +
  theme_minimal(base_size = 10) +
  scale_fill_brewer(palette = "Set2") +
  theme(
    axis.title.x = element_text(size = 9, margin = margin(t = 8)),
    axis.title.y = element_text(size = 9, margin = margin(r = 8)),
    axis.text = element_text(size = 8),
    strip.text = element_text(size = 8, face = "bold"),
    plot.title = element_blank(),
    plot.caption = element_text(size = 8, hjust = 0),
    # Customize grid lines
    panel.grid.major = element_line(size = 0.15, color = "gray90"),  # Thinner and lighter gray for major grid lines
    panel.grid.minor = element_line(size = 0.15, color = "gray90")   # Thinner and lighter gray for minor grid lines
  )

# Draw the plot
irt.var.g

# Close device
dev.off()

################################################################################
## INTER-FACTOR CORRELATIONS (EFA)
################################################################################

# Read Excel files (EFA)
general.nl.efa.inter.df <- read_excel("files/general_nl_efa_inter_correlations.xlsx")
general.nl.efa.inter.df$vaa <- "General – Dutch"

general.fr.efa.inter.df <- read_excel("files/general_fr_efa_inter_correlations.xlsx")
general.fr.efa.inter.df$vaa <- "General – French"

youth.nl.efa.inter.df <- read_excel("files/youth_nl_efa_inter_correlations.xlsx")
youth.nl.efa.inter.df$vaa <- "Youth – Dutch"

youth.fr.efa.inter.df <- read_excel("files/youth_fr_efa_inter_correlations.xlsx")
youth.fr.efa.inter.df$vaa <- "Youth – French"

brussels.nl.efa.inter.df <- read_excel("files/brussels_nl_efa_inter_correlations.xlsx")
brussels.nl.efa.inter.df$vaa <- "Brussels – Dutch"

brussels.fr.efa.inter.df <- read_excel("files/brussels_fr_efa_inter_correlations.xlsx")
brussels.fr.efa.inter.df$vaa <- "Brussels – French"

flanders.nl.efa.inter.df <- read_excel("files/flanders_nl_efa_inter_correlations.xlsx")
flanders.nl.efa.inter.df$vaa <- "Flanders"

wallonia.fr.efa.inter.df <- read_excel("files/wallonia_fr_efa_inter_correlations.xlsx")
wallonia.fr.efa.inter.df$vaa <- "Wallonia"

federal.nl.efa.inter.df <- read_excel("files/federal_nl_efa_inter_correlations.xlsx")
federal.nl.efa.inter.df$vaa <- "Federal – Dutch"

federal.fr.efa.inter.df <- read_excel("files/federal_fr_efa_inter_correlations.xlsx")
federal.fr.efa.inter.df$vaa <- "Federal – French"

eu.nl.efa.inter.df <- read_excel("files/eu_nl_efa_inter_correlations.xlsx")
eu.nl.efa.inter.df$vaa <- "EU – Dutch"

eu.fr.efa.inter.df <- read_excel("files/eu_fr_efa_inter_correlations.xlsx")
eu.fr.efa.inter.df$vaa <- "EU – French"

efa.inter.df <- rbind(general.nl.efa.inter.df, general.fr.efa.inter.df,
                      youth.nl.efa.inter.df, youth.fr.efa.inter.df,
                      brussels.nl.efa.inter.df, brussels.fr.efa.inter.df,
                      flanders.nl.efa.inter.df, wallonia.fr.efa.inter.df,
                      federal.nl.efa.inter.df, federal.fr.efa.inter.df,
                      eu.nl.efa.inter.df, eu.fr.efa.inter.df)

efa.inter3.df <- rbind(brussels.nl.efa.inter.df,
                       flanders.nl.efa.inter.df, wallonia.fr.efa.inter.df,
                       eu.nl.efa.inter.df, eu.fr.efa.inter.df)

efa.inter4.df <- rbind(general.nl.efa.inter.df, general.fr.efa.inter.df,
                       youth.nl.efa.inter.df, youth.fr.efa.inter.df,
                       brussels.fr.efa.inter.df,
                       federal.nl.efa.inter.df, federal.fr.efa.inter.df)

#efa.inter4.df$Correlation <- abs(efa.inter4.df$Correlation)

efa.inter4.df.list <- split(efa.inter4.df, efa.inter4.df$vaa)

efa.list <- as.list(efa.inter4.df.list)

# Convert vctrs_list_of to regular list (if needed)

efa.matrices <- lapply(efa.list, function(df) {
  df <- df %>%
    select(Factor1, Factor2, Correlation) %>%
    distinct()
  
  # Get full list of factors used
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  
  # Create an empty correlation matrix
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  
  # Fill matrix with correlations
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]
    mat[f1, f2] <- val
    mat[f2, f1] <- val  # Ensure symmetry
  }
  
  # Set diagonal to 1
  diag(mat) <- 1
  
  return(mat)
})

# --------------------------
# Dataset names and metadata
# --------------------------

datasets <- c("Brussels – French",
              "Federal – Dutch", "Federal – French",
              "General – Dutch", "General – French",
              "Youth – Dutch", "Youth – French")

# ------------------------------
# 2.1. Convert Correlation Matrices to DataFrame
# ------------------------------

# Function to convert a correlation matrix to a dataframe
cor_matrix_to_df <- function(cor_mat, dataset_name) {
  cor_df <- as.data.frame(as.table(cor_mat))  # Convert to long format
  cor_df$Dataset <- dataset_name  # Add dataset name
  colnames(cor_df) <- c("Factor1", "Factor2", "Correlation", "Dataset")
  cor_df <- cor_df[cor_df$Factor1 != cor_df$Factor2, ]  # Remove self-correlations
  return(cor_df)
}

# Apply the conversion function to each correlation matrix
cor_df_list <- lapply(names(efa.matrices), function(ds) {
  cor_matrix_to_df(efa.matrices[[ds]], ds)
})

# Combine into one dataframe
cor_df <- do.call(rbind, cor_df_list)

# -----------------------
# 4. Plot: Correlation Networks
# -----------------------

make_graph_plot_from_df <- function(cor_df, dataset_name, color) {
  # Remove duplicated edges like F1-F2 / F2-F1
  cor_df <- cor_df %>%
    mutate(pair = pmap_chr(list(Factor1, Factor2), ~ paste(sort(c(.x, .y)), collapse = "-"))) %>%
    distinct(pair, .keep_all = TRUE) %>%
    select(-pair)
  
  # Create graph
  graph <- graph_from_data_frame(cor_df, directed = FALSE)
  E(graph)$Correlation <- cor_df$Correlation
  layout <- ggraph::create_layout(graph, layout = "kk")
  
  # Prepare edge label positions
  edge_df <- as.data.frame(get.edgelist(graph))
  colnames(edge_df) <- c("from", "to")
  edge_df$Correlation <- E(graph)$Correlation
  
  edge_df <- edge_df %>%
    left_join(layout[, c("name", "x", "y")], by = c("from" = "name")) %>%
    rename(x_from = x, y_from = y) %>%
    left_join(layout[, c("name", "x", "y")], by = c("to" = "name")) %>%
    rename(x_to = x, y_to = y) %>%
    mutate(x_mid = (x_from + x_to) / 2,
           y_mid = (y_from + y_to) / 2)
  
  # Optional: add buffer to layout limits to prevent cut-off
  xlim_range <- range(layout$x) + c(-0.2, 0.2)
  ylim_range <- range(layout$y) + c(-0.2, 0.2)
  
  # Final plot
  p <- ggraph(layout, expand = TRUE) +
    geom_edge_link(aes(width = abs(Correlation),
                       color = ifelse(abs(Correlation) >= 0.5, "#45b6fe",
                                      ifelse(abs(Correlation) >= 0.3, "red", "grey"))),
                   alpha = 0.8) +
    
    geom_text_repel(data = edge_df,
                    aes(x = x_mid, y = y_mid, label = sprintf("%.2f", Correlation)),
                    size = 7, color = "black",
                    segment.color = NA, max.overlaps = Inf) +
    
    geom_node_point(size = 17, color = color) +
    geom_node_text(aes(label = name), color = "white", size = 5) +
    
    xlim(xlim_range) +
    ylim(ylim_range) +
    
    scale_edge_width(limits = c(0, 0.6), range = c(0.1, 4)) +
    scale_edge_color_identity() +
    scale_linetype_manual(values = c("dotted" = "dotted", "solid" = "solid")) +
    
    labs(title = dataset_name) +
    theme_void() +
    theme(
      plot.title = element_text(size = 24, margin = margin(b = 15)),
      plot.margin = margin(t = 40, r = 40, b = 40, l = 40),
      legend.position = "none"
    )
  
  return(p)
}

# Define colors per language for a bit of variation
node_colors <- rep(c("black"), length.out = length(datasets))

# Generate all 12 network plots
network_plots_from_df <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = datasets,
  color = node_colors,
  SIMPLIFY = FALSE
)

# -----------------------
# 5. Combine Network Plots
# -----------------------

network_grid_from_df <- wrap_plots(network_plots_from_df, ncol = 3) +
  plot_layout() &
  theme(plot.margin = margin(t = 5, b = 20))

# -----------------------
# 6. (Optional) Save Outputs
# -----------------------

# Save the combined network plot grid
ggsave("figs/network_plots_grid_from_df.png", network_grid_from_df, width = 14, height = 14, dpi = 300)

datasets <- c("Brussels – French",
              "Federal – Dutch", "Federal – French",
              "General – Dutch", "General – French",
              "Youth – Dutch", "Youth – French",
              "Brussels – Dutch", "EU – Dutch", "EU – French", 
              "Flanders", "Wallonia")

#efa.inter3.df$Correlation <- abs(efa.inter3.df$Correlation)  # Apply abs() if needed

efa.inter3.df.list <- split(efa.inter3.df, efa.inter3.df$vaa)

efa.list.3f <- as.list(efa.inter3.df.list)

# Rebuild matrices for 3-factor sets
efa.matrices.3f <- lapply(efa.list.3f, function(df) {
  df <- df %>%
    select(Factor1, Factor2, Correlation) %>%
    distinct()
  
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]
    mat[f1, f2] <- val
    mat[f2, f1] <- val
  }
  
  diag(mat) <- 1
  return(mat)
})

# Merge both into one list
efa.matrices.full <- c(efa.matrices, efa.matrices.3f)

# Update cor_df_list and network_plots_from_df using the full matrix list
cor_df_list <- lapply(names(efa.matrices.full), function(ds) {
  cor_matrix_to_df(efa.matrices.full[[ds]], ds)
})

network_plots_from_df <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = names(efa.matrices.full),
  color = rep("black", length(cor_df_list)),
  SIMPLIFY = FALSE
)

network_grid_from_df <- wrap_plots(network_plots_from_df, ncol = 3) +
  plot_layout() &
  theme(plot.margin = margin(t = 5, b = 20))

# Save the combined network plot grid
ggsave("figs/network_plots_grid_from_df.png", network_grid_from_df, width = 14, height = 14, dpi = 300)


##############################


library(igraph)
library(ggraph)
library(ggplot2)
library(ggrepel)
library(patchwork)
library(dplyr)
library(tibble)
library(purrr)

# -----------------------------------------------
# Convert both 4- and 3-factor sets into matrices
# -----------------------------------------------

# Function to convert correlation data to symmetric matrix
convert_to_matrix <- function(df) {
  df <- df %>% select(Factor1, Factor2, Correlation) %>% distinct()
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]  # use abs()
    mat[f1, f2] <- val
    mat[f2, f1] <- val
  }
  diag(mat) <- 1
  return(mat)
}

# Convert each group to matrices
efa.matrices.4f <- lapply(split(efa.inter4.df, efa.inter4.df$vaa), convert_to_matrix)
efa.matrices.3f <- lapply(split(efa.inter3.df, efa.inter3.df$vaa), convert_to_matrix)

# Combine into one full list
efa.matrices.full <- c(efa.matrices.4f, efa.matrices.3f)

# -----------------------------------------------
# Average correlations per plot and overall
# -----------------------------------------------

compute_avg_correlation <- function(mat) {
  vals <- abs(mat[lower.tri(mat)])
  mean(vals, na.rm = TRUE)
}

cor_avg_per_plot <- sapply(efa.matrices.full, compute_avg_correlation)
overall_avg_correlation <- round(mean(cor_avg_per_plot), 3)

# -----------------------------------------------
# Convert matrices to data frames
# -----------------------------------------------

cor_matrix_to_df <- function(cor_mat, dataset_name) {
  cor_df <- as.data.frame(as.table(cor_mat))
  cor_df$Dataset <- dataset_name
  colnames(cor_df) <- c("Factor1", "Factor2", "Correlation", "Dataset")
  cor_df <- cor_df[cor_df$Factor1 != cor_df$Factor2, ]
  return(cor_df)
}

cor_df_list <- lapply(names(efa.matrices.full), function(ds) {
  cor_matrix_to_df(efa.matrices.full[[ds]], ds)
})

# -----------------------------------------------
# Create network plots
# -----------------------------------------------

make_graph_plot_from_df <- function(cor_df, dataset_name, color, avg_corr) {
  cor_df <- cor_df %>%
    mutate(pair = pmap_chr(list(Factor1, Factor2), ~ paste(sort(c(.x, .y)), collapse = "-"))) %>%
    distinct(pair, .keep_all = TRUE) %>%
    select(-pair)
  
  graph <- graph_from_data_frame(cor_df, directed = FALSE)
  E(graph)$Correlation <- cor_df$Correlation
  layout <- create_layout(graph, layout = "kk")
  
  edge_df <- as.data.frame(get.edgelist(graph))
  colnames(edge_df) <- c("from", "to")
  edge_df$Correlation <- E(graph)$Correlation
  
  edge_df <- edge_df %>%
    left_join(layout[, c("name", "x", "y")], by = c("from" = "name")) %>%
    rename(x_from = x, y_from = y) %>%
    left_join(layout[, c("name", "x", "y")], by = c("to" = "name")) %>%
    rename(x_to = x, y_to = y) %>%
    mutate(x_mid = (x_from + x_to) / 2,
           y_mid = (y_from + y_to) / 2)
  
  # Add more space around the plot area
  xlim_range <- range(layout$x) + c(-0.3, 0.3)
  ylim_range <- range(layout$y) + c(-0.5, 0.3)
  
  p <- ggraph(layout) +
    geom_edge_link(aes(width = abs(Correlation),
                       color = ifelse(abs(Correlation) >= 0.5, "#45b6fe",
                                      ifelse(abs(Correlation) >= 0.3, "red", "gray"))),
                   alpha = 0.5) +
    geom_text_repel(data = edge_df,
                    aes(x = x_mid, y = y_mid, label = sprintf("%.2f", Correlation)),
                    size = 6, color = "black", segment.color = NA, max.overlaps = Inf) +
    geom_node_point(size = 16, color = color) +
    geom_node_text(aes(label = name), color = "white", size = 6) +
    
    # Adjust annotation: more padding, still bottom-right
    annotate("text", 
             x = xlim_range[2] - 0.1, 
             y = ylim_range[1] + 0.2, 
             label = paste0("Mean abs. corr. = ", round(avg_corr, 2)),
             hjust = 1, vjust = 1., size = 7, fontface = "italic") +
    
    xlim(xlim_range) +
    ylim(ylim_range) +
    
    scale_edge_width(limits = c(0, 0.6), range = c(0.5, 5)) +
    scale_edge_color_identity() +
    
    theme_void() +
    theme(
      plot.title = element_text(size = 22, face = "bold", margin = margin(b = 0), hjust = 0.5),
      #plot.margin = margin(t = 15, r = 15, b = 15, l = 15),
      legend.position = "none"
    ) +
    
    labs(title = dataset_name)
  
  return(p)
}

# Create plots
node_colors <- rep("black", length(efa.matrices.full))

network_plots <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = names(efa.matrices.full),
  color = node_colors,
  avg_corr = cor_avg_per_plot[names(efa.matrices.full)],
  SIMPLIFY = FALSE
)

# -----------------------------------------------
# Combine plots
# -----------------------------------------------

network_grid <- wrap_plots(network_plots, ncol = 3) +
  plot_layout(guides = "collect") &
  theme(plot.margin = margin(t = 0, b = 10)) +
  plot_annotation(
    caption = paste0("Overall average inter-factor correlation: ", sprintf("%.2f", overall_avg_correlation)),
    theme = theme(plot.caption = element_text(hjust = 0, size = 10))
  )

# Combine plots
network_grid <- wrap_plots(network_plots, ncol = 3) +
  plot_layout(guides = "collect") +
  plot_annotation(
    caption = paste0("Red line = |inter-factor correlation| ≥ 0.3\nOverall mean absolute inter-factor correlation: ", sprintf("%.2f", overall_avg_correlation)),
    theme = theme(
      plot.caption = element_text(hjust = 0.5, size = 20, margin = margin(t = 20)),
      plot.margin = margin(b = 10, t = 0, r = 0, l = 0)  # Add extra bottom margin
    )
  )

# -----------------------------------------------
# Save output
# -----------------------------------------------

ggsave("figs/efa_network_plots_grid_with_all_avg_corr.png", network_grid,
       width = 16, height = 16, dpi = 300)

################################################################################
## INTER-FACTOR CORRELATIONS (IRT)
################################################################################

# Load data

general.nl.irt.inter <- read_excel("files/general_nl_irt_inter_correlations.xlsx")
general.fr.irt.inter <- read_excel("files/general_fr_irt_inter_correlations.xlsx")
youth.nl.irt.inter <- read_excel("files/youth_nl_irt_inter_correlations.xlsx")
youth.fr.irt.inter <- read_excel("files/youth_fr_irt_inter_correlations.xlsx")
brussels.nl.irt.inter <- read_excel("files/brussels_nl_irt_inter_correlations.xlsx")
brussels.fr.irt.inter <- read_excel("files/brussels_fr_irt_inter_correlations.xlsx")
flanders.nl.irt.inter <- read_excel("files/flanders_nl_irt_inter_correlations.xlsx")
wallonia.fr.irt.inter <- read_excel("files/wallonia_fr_irt_inter_correlations.xlsx")
federal.nl.irt.inter <- read_excel("files/federal_nl_irt_inter_correlations.xlsx")
federal.fr.irt.inter <- read_excel("files/federal_fr_irt_inter_correlations.xlsx")
eu.nl.irt.inter <- read_excel("files/eu_nl_irt_inter_correlations.xlsx")
eu.fr.irt.inter <- read_excel("files/eu_fr_irt_inter_correlations.xlsx")

irt.inter <- rbind(general.nl.irt.inter, general.fr.irt.inter, youth.nl.irt.inter,
                   youth.fr.irt.inter, brussels.nl.irt.inter, brussels.fr.irt.inter,
                   flanders.nl.irt.inter, wallonia.fr.irt.inter, federal.nl.irt.inter,
                   eu.nl.irt.inter, eu.fr.irt.inter)

# Add symmetric rows for full matrix representation
irt.inter.df <- irt.inter %>%
  bind_rows(
    irt.inter %>%
      rename(Factor1_tmp = Factor1, Factor2_tmp = Factor2) %>%
      transmute(vaa,
                Factor1 = Factor2_tmp,
                Factor2 = Factor1_tmp,
                Correlation)
  ) %>%
  arrange(vaa, Factor1, Factor2)

# View dataframe
print(irt.inter.df)

irt.inter3.df <- irt.inter.df %>% 
  filter(vaa %in% c("Brussels – Dutch", "Flanders",
                    "Wallonia", "EU – Dutch", "EU – French"))

irt.inter4.df <- irt.inter.df %>% 
  filter(vaa %in% c("General – Dutch", "General – French",
                    "Youth – Dutch", "Youth – French",
                    "Brussels – French", "Federal – Dutch",
                    "Federal – French"))

#irt.inter4.df$Correlation <- abs(irt.inter4.df$Correlation)

irt.inter4.df.list <- split(irt.inter4.df, irt.inter4.df$vaa)

irt.list <- as.list(irt.inter4.df.list)

# Convert vctrs_list_of to regular list (if needed)

irt.matrices <- lapply(irt.list, function(df) {
  df <- df %>%
    select(Factor1, Factor2, Correlation) %>%
    distinct()
  
  # Get full list of factors used
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  
  # Create an empty correlation matrix
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  
  # Fill matrix with correlations
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]
    mat[f1, f2] <- val
    mat[f2, f1] <- val  # Ensure symmetry
  }
  
  # Set diagonal to 1
  diag(mat) <- 1
  
  return(mat)
})

# --------------------------
# Dataset names and metadata
# --------------------------

datasets <- c("Brussels – French",
              "Federal – Dutch", "Federal – French",
              "General – Dutch", "General – French",
              "Youth – Dutch", "Youth – French")

# ------------------------------
# 2.1. Convert Correlation Matrices to DataFrame
# ------------------------------

# Function to convert a correlation matrix to a dataframe
cor_matrix_to_df <- function(cor_mat, dataset_name) {
  cor_df <- as.data.frame(as.table(cor_mat))  # Convert to long format
  cor_df$Dataset <- dataset_name  # Add dataset name
  colnames(cor_df) <- c("Factor1", "Factor2", "Correlation", "Dataset")
  cor_df <- cor_df[cor_df$Factor1 != cor_df$Factor2, ]  # Remove self-correlations
  return(cor_df)
}

# Apply the conversion function to each correlation matrix
cor_df_list <- lapply(names(irt.matrices), function(ds) {
  cor_matrix_to_df(irt.matrices[[ds]], ds)
})

# Combine into one dataframe
cor_df <- do.call(rbind, cor_df_list)

# -----------------------
# 4. Plot: Correlation Networks
# -----------------------

make_graph_plot_from_df <- function(cor_df, dataset_name, color) {
  # Remove duplicated edges like F1-F2 / F2-F1
  cor_df <- cor_df %>%
    mutate(pair = pmap_chr(list(Factor1, Factor2), ~ paste(sort(c(.x, .y)), collapse = "-"))) %>%
    distinct(pair, .keep_all = TRUE) %>%
    select(-pair)
  
  # Create graph
  graph <- graph_from_data_frame(cor_df, directed = FALSE)
  E(graph)$Correlation <- cor_df$Correlation
  layout <- ggraph::create_layout(graph, layout = "kk")
  
  # Prepare edge label positions
  edge_df <- as.data.frame(get.edgelist(graph))
  colnames(edge_df) <- c("from", "to")
  edge_df$Correlation <- E(graph)$Correlation
  
  edge_df <- edge_df %>%
    left_join(layout[, c("name", "x", "y")], by = c("from" = "name")) %>%
    rename(x_from = x, y_from = y) %>%
    left_join(layout[, c("name", "x", "y")], by = c("to" = "name")) %>%
    rename(x_to = x, y_to = y) %>%
    mutate(x_mid = (x_from + x_to) / 2,
           y_mid = (y_from + y_to) / 2)
  
  # Optional: add buffer to layout limits to prevent cut-off
  xlim_range <- range(layout$x) + c(-0.2, 0.2)
  ylim_range <- range(layout$y) + c(-0.2, 0.2)
  
  # Final plot
  p <- ggraph(layout, expand = TRUE) +
    geom_edge_link(aes(width = abs(Correlation),
                       color = ifelse(abs(Correlation) >= 0.5, "#45b6fe",
                                      ifelse(abs(Correlation) >= 0.3, "red", "grey"))),
                   alpha = 0.8) +
    
    geom_text_repel(data = edge_df,
                    aes(x = x_mid, y = y_mid, label = sprintf("%.2f", Correlation)),
                    size = 7, color = "black",
                    segment.color = NA, max.overlaps = Inf) +
    
    geom_node_point(size = 17, color = color) +
    geom_node_text(aes(label = name), color = "white", size = 5) +
    
    xlim(xlim_range) +
    ylim(ylim_range) +
    
    scale_edge_width(limits = c(0, 0.6), range = c(0.1, 4)) +
    scale_edge_color_identity() +
    scale_linetype_manual(values = c("dotted" = "dotted", "solid" = "solid")) +
    
    labs(title = dataset_name) +
    theme_void() +
    theme(
      plot.title = element_text(size = 24, margin = margin(b = 15)),
      plot.margin = margin(t = 40, r = 40, b = 40, l = 40),
      legend.position = "none"
    )
  
  return(p)
}

# Define colors per language for a bit of variation
node_colors <- rep(c("black"), length.out = length(datasets))

# Generate all 12 network plots
network_plots_from_df <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = datasets,
  color = node_colors,
  SIMPLIFY = FALSE
)

# -----------------------
# 5. Combine Network Plots
# -----------------------

network_grid_from_df <- wrap_plots(network_plots_from_df, ncol = 3) +
  plot_layout() &
  theme(plot.margin = margin(t = 5, b = 20))

# -----------------------
# 6. (Optional) Save Outputs
# -----------------------

# Save the combined network plot grid
ggsave("figs/network_plots_grid_from_df.png", network_grid_from_df, width = 14, height = 14, dpi = 300)

datasets <- c("Brussels – French",
              "Federal – Dutch", "Federal – French",
              "General – Dutch", "General – French",
              "Youth – Dutch", "Youth – French",
              "Brussels – Dutch", "EU – Dutch", "EU – French", 
              "Flanders", "Wallonia")

#irt.inter3.df$Correlation <- abs(irt.inter3.df$Correlation)  # Apply abs() if needed

irt.inter3.df.list <- split(irt.inter3.df, irt.inter3.df$vaa)

irt.list.3f <- as.list(irt.inter3.df.list)

# Rebuild matrices for 3-factor sets
irt.matrices.3f <- lapply(irt.list.3f, function(df) {
  df <- df %>%
    select(Factor1, Factor2, Correlation) %>%
    distinct()
  
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]
    mat[f1, f2] <- val
    mat[f2, f1] <- val
  }
  
  diag(mat) <- 1
  return(mat)
})

# Merge both into one list
irt.matrices.full <- c(irt.matrices, irt.matrices.3f)

# Update cor_df_list and network_plots_from_df using the full matrix list
cor_df_list <- lapply(names(irt.matrices.full), function(ds) {
  cor_matrix_to_df(irt.matrices.full[[ds]], ds)
})

network_plots_from_df <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = names(irt.matrices.full),
  color = rep("black", length(cor_df_list)),
  SIMPLIFY = FALSE
)

network_grid_from_df <- wrap_plots(network_plots_from_df, ncol = 3) +
  plot_layout() &
  theme(plot.margin = margin(t = 5, b = 20))

# Save the combined network plot grid
ggsave("figs/network_plots_grid_from_df.png", network_grid_from_df, width = 14, height = 14, dpi = 300)


##############################


library(igraph)
library(ggraph)
library(ggplot2)
library(ggrepel)
library(patchwork)
library(dplyr)
library(tibble)
library(purrr)

# -----------------------------------------------
# Convert both 4- and 3-factor sets into matrices
# -----------------------------------------------

# Function to convert correlation data to symmetric matrix
convert_to_matrix <- function(df) {
  df <- df %>% select(Factor1, Factor2, Correlation) %>% distinct()
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]  # use abs()
    mat[f1, f2] <- val
    mat[f2, f1] <- val
  }
  diag(mat) <- 1
  return(mat)
}

# Convert each group to matrices
irt.matrices.4f <- lapply(split(irt.inter4.df, irt.inter4.df$vaa), convert_to_matrix)
irt.matrices.3f <- lapply(split(irt.inter3.df, irt.inter3.df$vaa), convert_to_matrix)

# Combine into one full list
irt.matrices.full <- c(irt.matrices.4f, irt.matrices.3f)

# -----------------------------------------------
# Average correlations per plot and overall
# -----------------------------------------------

compute_avg_correlation <- function(mat) {
  vals <- abs(mat[lower.tri(mat)])
  mean(vals, na.rm = TRUE)
}

cor_avg_per_plot <- sapply(irt.matrices.full, compute_avg_correlation)
overall_avg_correlation <- round(mean(cor_avg_per_plot), 3)

# -----------------------------------------------
# Convert matrices to data frames
# -----------------------------------------------

cor_matrix_to_df <- function(cor_mat, dataset_name) {
  cor_df <- as.data.frame(as.table(cor_mat))
  cor_df$Dataset <- dataset_name
  colnames(cor_df) <- c("Factor1", "Factor2", "Correlation", "Dataset")
  cor_df <- cor_df[cor_df$Factor1 != cor_df$Factor2, ]
  return(cor_df)
}

cor_df_list <- lapply(names(irt.matrices.full), function(ds) {
  cor_matrix_to_df(irt.matrices.full[[ds]], ds)
})

# -----------------------------------------------
# Create network plots
# -----------------------------------------------

make_graph_plot_from_df <- function(cor_df, dataset_name, color, avg_corr) {
  cor_df <- cor_df %>%
    mutate(pair = pmap_chr(list(Factor1, Factor2), ~ paste(sort(c(.x, .y)), collapse = "-"))) %>%
    distinct(pair, .keep_all = TRUE) %>%
    select(-pair)
  
  graph <- graph_from_data_frame(cor_df, directed = FALSE)
  E(graph)$Correlation <- cor_df$Correlation
  layout <- create_layout(graph, layout = "kk")
  
  edge_df <- as.data.frame(get.edgelist(graph))
  colnames(edge_df) <- c("from", "to")
  edge_df$Correlation <- E(graph)$Correlation
  
  edge_df <- edge_df %>%
    left_join(layout[, c("name", "x", "y")], by = c("from" = "name")) %>%
    rename(x_from = x, y_from = y) %>%
    left_join(layout[, c("name", "x", "y")], by = c("to" = "name")) %>%
    rename(x_to = x, y_to = y) %>%
    mutate(x_mid = (x_from + x_to) / 2,
           y_mid = (y_from + y_to) / 2)
  
  # Add more space around the plot area
  xlim_range <- range(layout$x) + c(-0.3, 0.3)
  ylim_range <- range(layout$y) + c(-0.5, 0.3)
  
  p <- ggraph(layout) +
    geom_edge_link(aes(width = abs(Correlation),
                       color = ifelse(abs(Correlation) >= 0.5, "#45b6fe",
                                      ifelse(abs(Correlation) >= 0.3, "red", "gray"))),
                   alpha = 0.5) +
    geom_text_repel(data = edge_df,
                    aes(x = x_mid, y = y_mid, label = sprintf("%.2f", Correlation)),
                    size = 6, color = "black", segment.color = NA, max.overlaps = Inf) +
    geom_node_point(size = 16, color = color) +
    geom_node_text(aes(label = name), color = "white", size = 6) +
    
    # Adjust annotation: more padding, still bottom-right
    annotate("text", 
             x = xlim_range[2] - 0.1, 
             y = ylim_range[1] + 0.2, 
             label = paste0("Mean abs. corr. = ", round(avg_corr, 2)),
             hjust = 1, vjust = 1., size = 7, fontface = "italic") +
    
    xlim(xlim_range) +
    ylim(ylim_range) +
    
    scale_edge_width(limits = c(0, 0.6), range = c(0.5, 5)) +
    scale_edge_color_identity() +
    
    theme_void() +
    theme(
      plot.title = element_text(size = 22, face = "bold", margin = margin(b = 0), hjust = 0.5),
      #plot.margin = margin(t = 15, r = 15, b = 15, l = 15),
      legend.position = "none"
    ) +
    
    labs(title = dataset_name)
  
  return(p)
}

# Create plots
node_colors <- rep("black", length(irt.matrices.full))

network_plots <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = names(irt.matrices.full),
  color = node_colors,
  avg_corr = cor_avg_per_plot[names(irt.matrices.full)],
  SIMPLIFY = FALSE
)

# -----------------------------------------------
# Combine plots
# -----------------------------------------------

network_grid <- wrap_plots(network_plots, ncol = 3) +
  plot_layout(guides = "collect") &
  theme(plot.margin = margin(t = 0, b = 10)) +
  plot_annotation(
    caption = paste0("Overall average inter-factor correlation: ", sprintf("%.2f", overall_avg_correlation)),
    theme = theme(plot.caption = element_text(hjust = 0, size = 10))
  )

# Combine plots
network_grid <- wrap_plots(network_plots, ncol = 3) +
  plot_layout(guides = "collect") +
  plot_annotation(
    caption = paste0("Red line = |inter-factor correlation| ≥ 0.3\nOverall mean absolute inter-factor correlation: ", sprintf("%.2f", overall_avg_correlation)),
    theme = theme(
      plot.caption = element_text(hjust = 0.5, size = 20, margin = margin(t = 20)),
      plot.margin = margin(b = 10, t = 0, r = 0, l = 0)  # Add extra bottom margin
    )
  )

# -----------------------------------------------
# Save output
# -----------------------------------------------

ggsave("figs/irt_network_plots_grid_with_all_avg_corr.png", network_grid,
       width = 16, height = 16, dpi = 300)